import gzip

orfile = gzip.open("categories.txt.gz",'r')

pr_id = buf = "start"
category = set()

books = []

while buf != "":
	print len(books)
	buf = orfile.readline()
	if not buf.startswith(" "):
		if "Books" in category:
			books.append(pr_id)
		pr_id = buf.strip()
		category = set()
	else:
		category.add(buf.strip().split(",")[0].strip())

orfile.close()

nnfile = gzip.open("books.txt.gz",'w')
for book in books:
	nnfile.write(book+"\n")

nnfile.close()

#--------------------------------------------------


import gzip
nnfile = gzip.open("books.txt.gz",'r')
books = []
buf = "dfg"
while buf != "":
 buf = nnfile.readline()
 books.append(buf.strip())
 print len(books), " out of 929,264 read"




